{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Running Phi-3 model in MLFlow"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "---------\n",
    "This notebook describes the use of a custom Python wrapper on the example of Phi-3 mini 4K Instruct."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Importing required packages\n",
    "import mlflow\n",
    "from mlflow.models import infer_signature\n",
    "import onnxruntime_genai as og"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Defining custom Python class for Phi-3 Mini 4K model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Defining custom PythonModel class\n",
    "class Phi3Model(mlflow.pyfunc.PythonModel):\n",
    "    def load_context(self, context):\n",
    "        # Retrieving model from the artifacts\n",
    "        model_path = context.artifacts[\"phi3-mini-onnx\"]\n",
    "        model_options = {\n",
    "             \"max_length\": 300,\n",
    "             \"temperature\": 0.2,         \n",
    "        }\n",
    "    \n",
    "        # Defining the model\n",
    "        self.phi3_model = og.Model(model_path)\n",
    "        self.params = og.GeneratorParams(self.phi3_model)\n",
    "        self.params.set_search_options(**model_options)\n",
    "        \n",
    "        # Defining the tokenizer\n",
    "        self.tokenizer = og.Tokenizer(self.phi3_model)\n",
    "\n",
    "    def predict(self, context, model_input):\n",
    "        # Retrieving prompt from the input\n",
    "        prompt = model_input[\"prompt\"][0]\n",
    "        self.params.input_ids = self.tokenizer.encode(prompt)\n",
    "\n",
    "        # Generating the model's response\n",
    "        response = self.phi3_model.generate(self.params)\n",
    "\n",
    "        return self.tokenizer.decode(response[0][len(self.params.input_ids):])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Generating MLFlow artifact"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2024/06/18 14:17:34 INFO mlflow.models.utils: We convert input dictionaries to pandas DataFrames such that each key represents a column, collectively constituting a single row of data. If you would like to save data as multiple rows, please convert your data to a pandas DataFrame before passing to input_example.\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "1c43e3ebd0d74c18997ec6bddf560e47",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading artifacts:   0%|          | 0/10 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Generating the MLflow model with the custom Python model\n",
    "input_example = {\"prompt\": \"<|system|>You are a helpful AI assistant.<|end|><|user|>What is the capital of Spain?<|end|><|assistant|>\"}\n",
    "artifact_path = \"phi3_mlflow_model\"\n",
    "\n",
    "with mlflow.start_run() as run:\n",
    "    model_info = mlflow.pyfunc.log_model(\n",
    "        artifact_path = artifact_path,\n",
    "        python_model = Phi3Model(),\n",
    "        artifacts = {\n",
    "            \"phi3-mini-onnx\": \"cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4\",\n",
    "        },\n",
    "        input_example = input_example,\n",
    "        signature = infer_signature(input_example, [\"Run\"]),\n",
    "        extra_pip_requirements = [\"torch\", \"onnxruntime_genai\", \"numpy\"],\n",
    "    )"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Running Phi-3 as MLFlow model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Loading Phi-3 MLFlow model\n",
    "loaded_model = mlflow.pyfunc.load_model(\n",
    "    model_uri = model_info.model_uri\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "inputs: \n",
       "  ['prompt': string (required)]\n",
       "outputs: \n",
       "  [string (required)]\n",
       "params: \n",
       "  None"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Checking MLFlow model's signature\n",
    "loaded_model.metadata.signature"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      " Alright, here's a little atom-related joke for you!\n",
      "\n",
      "Why don't electrons ever play hide and seek with protons?\n",
      "\n",
      "Because good luck finding them when they're always \"sharing\" their electrons!\n",
      "\n",
      "Remember, this is all in good fun, and we're just having a little atomic-level humor!\n"
     ]
    }
   ],
   "source": [
    "# Testing the loaded model\n",
    "response = loaded_model.predict(\n",
    "    {\"prompt\": \"<|system|>You are a stand-up comedian.<|end|><|user|>Tell me a joke about atom<|end|><|assistant|>\",}\n",
    ")\n",
    "print(response)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
